package net.ming616.nlp.extraction.service.impl;

import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import net.ming616.nlp.extraction.model.JingdongProduct;
import net.ming616.nlp.extraction.service.HTMLExtractor;
import net.ming616.nlp.extraction.service.JingdongExtractor;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service("jingdongExtractor")
public class JingdongExtractorImpl implements JingdongExtractor {
	/**
	 * Logger for this class
	 */
	private static final Logger logger = Logger
			.getLogger(JingdongExtractorImpl.class);

	@Autowired
	HTMLExtractor htmlExtractor;

	public JingdongProduct getProduct(String productURL) {
		JingdongProduct product = new JingdongProduct();
		Document doc = null;
		try {
			doc = Jsoup.parse(new URL(productURL), 6000);
		} catch (Exception e) {
			logger.error(e.getMessage());
		}
		Element m = doc.select("#select .mt h1").first();
		if (m != null) {
			String modelName = m.text();
			product.setName(modelName);
			Elements attributes = doc.select("#select dl");
			for (Element attribute : attributes) {
				String attributeName = attribute.select("dt").first().text();
				attributeName = StringUtils.substringBefore(attributeName, "");
				Elements values = attribute.select("dd div:not(.curr)");
				List<String> valueList = new ArrayList<String>();
				for (int i = 0; i < values.size(); i++) {
					Element value = values.get(i);
					valueList.add(value.text());
				}
				product.getAttributes().put(attributeName, valueList);
			}
		}
		return product;
	}

	public Map<String, String> getProductURLList(String baseURL) {
		Document doc = null;
		try {
			doc = Jsoup.parse(new URL(baseURL), 6000);
		} catch (Exception e) {
			logger.error(e.getMessage());
		}
		Map<String, String> itemMap = new HashMap<String, String>();
		Elements items = doc.select("#sortlist .item li");
		for (Element item : items) {
			String name = item.text();
			String url = item.select("a").attr("href");
			itemMap.put(name, url);
		}
		return itemMap;
	}

}
